# PACKAGES
suppressWarnings(suppressMessages({
library(reshape2)
library("phyloseq")
library("ggplot2")
library("dplyr")
library(tibble)
library(vegan)
library(compositions)
library(openxlsx)
library(DECIPHER)
library(pheatmap)
library("biomformat")
library(Biostrings)
library(decontam)
library(microDecon)
library(FSA)
library(ggpubr)
library(ggfortify)
library(eulerr)
library(microbiome)
}))
normalization <- function(phyloseq_object){
# normalization of phyloseq object
phyloseq_object_n <- phyloseq_object
asv_norm <- phyloseq_object_n@otu_table
taxa_norm <- phyloseq_object_n@tax_table
sums <- colSums(asv_norm[,])
for (i in 1:length(sums)) {
asv_norm[,i] <- asv_norm[,i]/sums[i]
}
where <- rowSums(asv_norm) !=0
asv_norm <- asv_norm[where,]
taxa_norm <- taxa_norm[where,]
phyloseq_object_n@otu_table <- asv_norm
phyloseq_object_n@tax_table <- taxa_norm
return(phyloseq_object_n)
}
normalization2 <- function(taxa_reads_table){
# normalization of dataframe
taxa_reads_table_n <- taxa_reads_table
asv_norm <- taxa_reads_table_n[,7:ncol(taxa_reads_table_n)]
sums <- colSums(asv_norm[,])
for (i in 1:length(sums)) {
asv_norm[,i] <- asv_norm[,i]/sums[i]
}
where <- rowSums(asv_norm) !=0
asv_norm <- asv_norm[where,]
taxa_reads_table_n <- taxa_reads_table_n[where,]
taxa_reads_table_n[,7:ncol(taxa_reads_table_n)] <- asv_norm
return(taxa_reads_table_n)
}
abundance_filtering <- function(phyloseq_object, to_filter= 0.01){
# abundance filtering of phyloseq object
phyloseq_object_n <- normalization(phyloseq_object)
filtering <- filter_taxa(phyloseq_object_n, function(x) sum(x > to_filter)>0 , FALSE)
phyloseq_object@otu_table <- phyloseq_object@otu_table[filtering,]
phyloseq_object@tax_table <- phyloseq_object@tax_table[filtering,]
return(phyloseq_object)
}
mock_zymo_genus_merging <- function(taxa_table,asv_table,zymo_genus){
# merging mock community samples with reference abundance
taxa_table[is.na(taxa_table)] <- "unassigned"
taxa_reads_table <- merge(taxa_table,asv_table, by="ASV", all=TRUE)
groups <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
columns <- names(taxa_reads_table)[9:ncol(taxa_reads_table)] # names of columns with samples
grouped_genus <- taxa_reads_table %>%
group_by_at(groups[1:6]) %>%
summarise(across(columns, sum))
mock_genus_n <- normalization2(grouped_genus)
to_filter <- (rowSums((mock_genus_n[,7:ncol(mock_genus_n)])>0.01))>0
mock_genus_n <- mock_genus_n[to_filter,]
mock_zymo_genus <- merge(mock_genus_n,zymo_genus, all=TRUE)
mock_zymo_genus[is.na(mock_zymo_genus)] <- 0
return(mock_zymo_genus)
}
genus_merging <- function(taxa_table,asv_table){
# merging taxonomy and filtering
taxa_table[is.na(taxa_table)] <- "unassigned"
taxa_reads_table <- merge(taxa_table,asv_table, by="ASV", all=TRUE)
groups <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
columns <- names(taxa_reads_table)[9:ncol(taxa_reads_table)] # names of columns with samples
grouped_genus <- taxa_reads_table %>%
group_by_at(groups[1:6]) %>%
summarise(across(columns, sum))
asv_table <- grouped_genus[,7:ncol(grouped_genus)]
asv_table["ASV"] <- paste0("GENUS", 1:nrow(asv_table))
taxa_table <- grouped_genus[,1:6]
taxa_table["ASV"] <- paste0("GENUS", 1:nrow(taxa_table))
# Step 1: Calculate abundance of each ASV within each sample
abundance <- asv_table[, -which(colnames(asv_table)=="ASV")] / colSums(asv_table[,-which(colnames(asv_table)=="ASV")]) * 100
# Step 2: Calculate prevalence of each ASV across all samples
prevalence <- rowSums(abundance > 0) / ncol(abundance) * 100
# Step 3: Filter out ASVs that are very low abundant and very low prevalent
threshold_abundance <- 0.01 # Abundance threshold (0.01%)
threshold_prevalence <- 10 # Prevalence threshold (10%)
# Filter ASVs based on abundance and prevalence
to_filter <- rowSums(abundance >= threshold_abundance) > 0 & prevalence >= threshold_prevalence
asv_table <- asv_table[to_filter, ]
taxa_table <- taxa_table[to_filter,]
# Output filtered ASV table
return(list(asv_table,taxa_table))
}
construct_phyloseq <- function(asv_table, taxa_table){
# construct phyloseq object - for mock communities (without metadata)
otu_mat <- asv_table
tax_mat <- taxa_table
rownames(otu_mat) <- 1:nrow(otu_mat)
rownames(tax_mat) <- 1:nrow(tax_mat)
otu_mat <- otu_mat %>%
tibble::column_to_rownames("ASV")
tax_mat <- tax_mat %>%
tibble::column_to_rownames("ASV")
otu_mat <- as.matrix(otu_mat)
tax_mat <- as.matrix(tax_mat)
OTU = otu_table(otu_mat, taxa_are_rows = TRUE)
TAX = tax_table(tax_mat)
object <- phyloseq(OTU, TAX)
return(object)
}
construct_phyloseq_real <- function(asv_table, taxa_table, metadata){
# construct phyloseq obect - for biopsy samples (with metadata)
otu_mat <- asv_table
tax_mat <- taxa_table
samples_df <- metadata
rownames(otu_mat) <- 1:nrow(otu_mat)
rownames(tax_mat) <- 1:nrow(tax_mat)
otu_mat <- otu_mat %>%
tibble::column_to_rownames("ASV")
tax_mat <- tax_mat %>%
tibble::column_to_rownames("ASV")
samples_df <- samples_df %>%
tibble::column_to_rownames("SampleID")
otu_mat <- as.matrix(otu_mat)
tax_mat <- as.matrix(tax_mat)
OTU = otu_table(otu_mat, taxa_are_rows = TRUE)
TAX = tax_table(tax_mat)
samples = sample_data(samples_df)
object <- phyloseq(OTU, TAX, samples)
return(object)
}
read_counts <- function(asv_table,input_numbers){
# function for visualizing the read counts (input vs retained)
counts <- as.data.frame(colSums(asv_table[,- grep("ASV",colnames(asv_table))]))
colnames(counts) <- "Count"
counts["Sample"] <- gsub("-","_",rownames(counts))
rownames(counts) <- counts$Sample
counts <- merge(counts,input_numbers,by='row.names',all=TRUE)
counts %>%
ggplot() +
geom_col( aes(x=reorder(Sample,input), y=input), alpha=.8, width=1, show.legend = TRUE,fill="#a5cee3") +
geom_text(aes(x=reorder(Sample,input), y=input,label = input), size = 3, hjust = -0.2) +
geom_col( aes(x=reorder(Sample,Count), y=Count), alpha=.8, width=0.5, show.legend = TRUE,fill="#e3211c") +
geom_text(aes(x=reorder(Sample,Count), y=Count,label = Count), size = 3, hjust = -0.1,color="#e32000") +
scale_fill_brewer(palette = "Set2") +
coord_flip() +
xlab("") +
labs(y = "Number of reads") +
theme(legend.position='none') +
theme_bw()
}
precision <- function(ps_object_filt, zymo_taxa){
# calculating precision
asv_filt <- as.data.frame(ps_object_filt@otu_table)
asv_filt <- asv_filt[,grep("Mock",colnames(asv_filt))]
asv_filt <- asv_filt[rowSums(asv_filt)>0,]
taxa_filt <- as.data.frame(ps_object_filt@tax_table)
taxa_filt <- taxa_filt[rownames(asv_filt),]
zymo_taxa <- zymo_taxa[nchar(zymo_taxa$Genus)>0,"Genus"]
precs <- c()
for (c in 1:ncol(asv_filt)){
taxa_sample <- taxa_filt[rownames(asv_filt)[(asv_filt[,colnames(asv_filt)[c]]>0)],"Genus"]
taxa_sample <- taxa_sample[!is.na(taxa_sample)]
tp <- sum(taxa_sample %in% zymo_taxa)
fp <- sum(!(taxa_sample %in% zymo_taxa))
prec = tp/(tp + fp)
precs <- c(precs,prec)
}
return(mean(precs))
}
recall <- function(ps_object_filt, zymo_taxa){
# calculating recall
asv_filt <- as.data.frame(ps_object_filt@otu_table)
asv_filt <- asv_filt[,grep("Mock",colnames(asv_filt))]
asv_filt <- asv_filt[rowSums(asv_filt)>0,]
taxa_filt <- as.data.frame(ps_object_filt@tax_table)
taxa_filt <- taxa_filt[rownames(asv_filt),]
zymo_taxa <- zymo_taxa[nchar(zymo_taxa$Genus)>0,"Genus"]
recs <- c()
for (c in 1:ncol(asv_filt)){
taxa_sample <- taxa_filt[rownames(asv_filt)[(asv_filt[,colnames(asv_filt)[c]]>0)],"Genus"]
taxa_sample <- taxa_sample[!is.na(taxa_sample)]
tp <- sum(taxa_sample %in% zymo_taxa)
fn <- sum(!(zymo_taxa %in% taxa_sample))
rec = tp/(tp+fn)
recs <- c(recs,rec)
}
return(mean(recs))
}
For each pipeline setting, the following is done:
Loading asv table and taxa table
Visualization of read counts before and after bioinformatics processing
Visualization of taxonomic composition (normalized and filtered data without transformation)
Merging data at genus level
Ordination analysis - Aitchison distance + PERMANOVA
Ordination analysis - Bray Curtis distance + PERMANOVA
Alpha diversity - Shannon index and Inverse Simpson index
Univariate analysis using Aldex2
PERMANOVA p-values of Aitchison distance are saved continually and output when processing the results at the end of the report. Differentially abundant taxa are continually saved in a table that is also outputted at the end of the report.
input_numbers <- read.csv("data/input_reads.csv", sep=",", row.names = 1)
rownames(input_numbers) <- regmatches(rownames(input_numbers), regexpr("(run\\d+_\\d+)", rownames(input_numbers)))
metadata <- read.csv("data/metadata.csv")
asv_table <- read.csv("data/results_dada2/1/final_seqtab_1.csv")
asv_table
taxa_table <- read.csv("data/results_dada2/1/taxa_1.csv", sep="\t", row.names = 1)
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(columns, sum)`.
## Caused by warning:
## ! Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(columns)
##
## # Now:
## data %>% select(all_of(columns))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Warning: Setting row names on a tibble is deprecated.
genus_dadas1_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
p_values_permanova <- c()
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.012
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1738
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 149, p-value = 0.1738
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1143
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 141, p-value = 0.1143
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
## Loading required package: zCompositions
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: NADA
## Loading required package: survival
##
## Attaching package: 'NADA'
## The following object is masked from 'package:IRanges':
##
## cor
## The following object is masked from 'package:S4Vectors':
##
## cor
## The following object is masked from 'package:compositions':
##
## cor
## The following object is masked from 'package:stats':
##
## cor
## Loading required package: truncnorm
## Loading required package: latticeExtra
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
final_which_genus <- data.frame()
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("DADA S1",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_dada2/12/final_seqtab_12.csv")
asv_table
taxa_table <- read.csv("data/results_dada2/12/taxa_12.csv", sep="\t", row.names = 1)
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_dadas12_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.03
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.2315
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 155, p-value = 0.2315
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1826
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 150, p-value = 0.1826
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("DADA S12",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_dada2/14/final_seqtab_14.csv", row.names = 1)
asv_table
taxa_table <- read.csv("data/results_dada2/14/taxa_14.csv", sep=",")
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_dadas14_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.017
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1918
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 151, p-value = 0.1918
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1143
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 141, p-value = 0.1143
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("DADA S14",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_vsearch/default/all.otutab.csv", sep="\t")
colnames(asv_table) <- c("ASV",colnames(asv_table)[-1])
asv_table
taxa_table_vsearch <- read.csv("data/results_vsearch/default/sintax.csv", sep=",", header = FALSE)
colnames(taxa_table_vsearch) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
taxa_table_vsearch
taxa_table <- data.frame(ASV = taxa_table_vsearch$ASV)
for (i in 2:8){
taxa_table[,i] <- regmatches(taxa_table_vsearch[,i], regexpr(":.+[(]", taxa_table_vsearch[,i]))
taxa_table[,i] <- substring(taxa_table[,i],2,nchar(taxa_table[,i])-1)
}
colnames(taxa_table) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
prob_table <- data.frame(ASV = taxa_table_vsearch$ASV)
for (i in 2:8){
prob_table[,i] <- regmatches(taxa_table_vsearch[,i], regexpr("[(].+[)]", taxa_table_vsearch[,i]))
prob_table[,i] <- substring(prob_table[,i],2,nchar(prob_table[,i])-1)
}
colnames(prob_table) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
# remove assignment with less than 0.7 probability
taxa_table[prob_table[,]<0.7] <- NA
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_vsearchd_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.174
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.862
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 207, p-value = 0.862
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.5648
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 222, p-value = 0.5648
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("VSEARCH DEFAULT",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_vsearch/6/all.otutab.csv", sep="\t")
colnames(asv_table) <- c("ASV",colnames(asv_table)[-1])
asv_table
taxa_table_vsearch <- read.csv("data/results_vsearch/6/sintax.csv", sep=",", header = FALSE)
colnames(taxa_table_vsearch) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
taxa_table_vsearch
taxa_table <- data.frame(ASV = taxa_table_vsearch$ASV)
for (i in 2:8){
taxa_table[,i] <- regmatches(taxa_table_vsearch[,i], regexpr(":.+[(]", taxa_table_vsearch[,i]))
taxa_table[,i] <- substring(taxa_table[,i],2,nchar(taxa_table[,i])-1)
}
colnames(taxa_table) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
prob_table <- data.frame(ASV = taxa_table_vsearch$ASV)
for (i in 2:8){
prob_table[,i] <- regmatches(taxa_table_vsearch[,i], regexpr("[(].+[)]", taxa_table_vsearch[,i]))
prob_table[,i] <- substring(prob_table[,i],2,nchar(prob_table[,i])-1)
}
colnames(prob_table) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
# remove assignment with less than 0.7 probability
taxa_table[prob_table[,]<0.7] <- NA
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_vsearchs6_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.124
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = FALSE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.7381
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 213, p-value = 0.7381
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.4612
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 228, p-value = 0.4612
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("VSEARCH S6",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_vsearch/9/all.otutab.csv", sep="\t")
colnames(asv_table) <- c("ASV",colnames(asv_table)[-1])
asv_table
taxa_table_vsearch <- read.csv("data/results_vsearch/9/sintax.csv", sep=",", header = FALSE)
colnames(taxa_table_vsearch) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
taxa_table_vsearch
taxa_table <- data.frame(ASV = taxa_table_vsearch$ASV)
for (i in 2:8){
taxa_table[,i] <- regmatches(taxa_table_vsearch[,i], regexpr(":.+[(]", taxa_table_vsearch[,i]))
taxa_table[,i] <- substring(taxa_table[,i],2,nchar(taxa_table[,i])-1)
}
colnames(taxa_table) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
prob_table <- data.frame(ASV = taxa_table_vsearch$ASV)
for (i in 2:8){
prob_table[,i] <- regmatches(taxa_table_vsearch[,i], regexpr("[(].+[)]", taxa_table_vsearch[,i]))
prob_table[,i] <- substring(prob_table[,i],2,nchar(prob_table[,i])-1)
}
colnames(prob_table) <- c("ASV","Domain","Phylum","Class","Order", "Family","Genus","Species")
# remove assignment with less than 0.7 probability
taxa_table[prob_table[,]<0.7] <- NA
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_vsearchs9_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.247
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.7994
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 210, p-value = 0.7994
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.7381
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 213, p-value = 0.7381
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("VSEARCH S9",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_vsearch/default/all.otutab.csv", sep="\t")
colnames(asv_table) <- c("ASV",colnames(asv_table)[-1])
asv_table
taxa_table <- read.csv("data/results_vsearch/default/taxa_idtaxa.csv", sep="\t")
colnames(taxa_table) <- c("ASV",colnames(taxa_table)[-1])
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_vsearchd_idtaxa_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.174
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.2211
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 154, p-value = 0.2211
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1918
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 151, p-value = 0.1918
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("VSEARCH DEFAULT IDTAXA",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_vsearch/6/all.otutab.csv", sep="\t")
colnames(asv_table) <- c("ASV",colnames(asv_table)[-1])
asv_table
taxa_table <- read.csv("data/results_vsearch/6/taxa_idtaxa.csv", sep="\t")
colnames(taxa_table) <- c("ASV",colnames(taxa_table)[-1])
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_vsearchs6_idtaxa_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.124
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = FALSE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1572
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 147, p-value = 0.1572
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1738
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 149, p-value = 0.1738
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("VSEARCH S6 IDTAXA",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_vsearch/9/all.otutab.csv", sep="\t")
colnames(asv_table) <- c("ASV",colnames(asv_table)[-1])
asv_table
taxa_table <- read.csv("data/results_vsearch/9/taxa_idtaxa.csv", sep="\t")
colnames(taxa_table) <- c("ASV",colnames(taxa_table)[-1])
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_vsearchs9_idtaxa_object <- genus_object
ORDINATION
PERMANOVA revealed a significant difference between groups.
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.247
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.2766
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 159, p-value = 0.2766
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.3141
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 162, p-value = 0.3141
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("VSEARCH S9 IDTAXA",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
asv_table <- read.csv("data/results_deblur/default/asv_table.csv", sep=",")
colnames(asv_table) <- gsub("[.]","_",colnames(asv_table))
asv_table
taxa_table <- read.csv("data/results_deblur/default/taxa_default.csv", sep="\t", row.names = 1)
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_deblurd_object <- genus_object
ORDINATION
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.013
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1918
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 152, p-value = 0.2012
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = 0.1417
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 148, p-value = 0.1653
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
if (length(which_genus)>0){
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("DEBLUR DEFAULT",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
}
asv_table <- read.csv("data/results_deblur/2/asv_table.csv", sep=",")
colnames(asv_table) <- gsub("[.]","_",colnames(asv_table))
asv_table
taxa_table <- read.csv("data/results_deblur/2/taxa_2.csv", sep="\t", row.names = 1)
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_deblurs2_object <- genus_object
ORDINATION
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.012
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = p-value = 0.1826
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 152, p-value = 0.2012
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = p-value = 0.1493
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 153, p-value = 0.211
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
if (length(which_genus)){
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("DEBLUR S2",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
}
asv_table <- read.csv("data/results_deblur/8/asv_table.csv", sep=",")
colnames(asv_table) <- gsub("[.]","_",colnames(asv_table))
asv_table
taxa_table <- read.csv("data/results_deblur/8/taxa_8.csv", sep="\t", row.names = 1)
taxa_table
READ COUNTS
read_counts(asv_table,input_numbers)
Phyloseq object, filtering
# phyloseq object
ps_object <- construct_phyloseq(asv_table, taxa_table)
# filtering
ps_object_filt <- abundance_filtering(ps_object)
#nornalization
ps_object_n <- normalization(ps_object_filt)
BARPLOT
# plot with reference
p1 <- plot_bar(ps_object_n, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
p2 <- plot_bar(ps_object_filt, fill = "Genus") +
geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack") +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(legend.position = "none")
ggarrange(p1,p2,
ncol = 1, nrow = 2)
GENUS
my_list <- genus_merging(taxa_table,asv_table)
## `summarise()` has grouped output by 'Domain', 'Phylum', 'Class', 'Order',
## 'Family'. You can override using the `.groups` argument.
asv_table <- my_list[[1]]
taxa_table <- my_list[[2]]
genus_object <- construct_phyloseq_real(asv_table = asv_table, taxa_table = taxa_table, metadata = metadata)
## Warning: Setting row names on a tibble is deprecated.
## Setting row names on a tibble is deprecated.
genus_deblurs8_object <- genus_object
ORDINATION
# aitchison
ps_t <- microbiome::transform(genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Type"] <- ps_t@sam_data$Type
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Type")] ~ Type, data = data_for_pca, method="euclidean")
test_permanova
p_values_permanova <- c(p_values_permanova,test_permanova$`Pr(>F)`[1])
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Type,show.legend = FALSE)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")
# bray curtis
ps_norm <-normalization(genus_object)
data_for_pca_norm <- as.data.frame(t(ps_norm@otu_table))
data_for_pca_norm["Type"] <- ps_norm@sam_data$Type
ord_b <- ordinate(ps_norm, method = "PCoA", distance = "bray")
ord_b_dist <- vegdist(t(as.data.frame(ps_norm@otu_table)), method="bray")
ps_norm_meta <- data.frame(Type = ps_norm@sam_data$Type)
test_permanova <- adonis2(t(ps_norm@otu_table) ~ Type, data = ps_norm_meta, method="bray") # 0.007
test_permanova
imp_bray <- ord_b$values$Relative_eig
pca_bray <- ord_b$vectors
ggplot(data_for_pca_norm, aes(x=pca_bray[,1],y=pca_bray[,2], color= Type)) +
geom_point(show.legend = TRUE) + stat_ellipse() +
xlab(paste("PCo1 ", "(",round(imp_bray[1]*100,2),"%", ")", sep=""))+
ylab(paste("PCo2 ", "(",round(imp_bray[2]*100,2),"%", ")", sep=""))+
theme_bw() +
labs(title="PCoA on Bray-Curtis")
ALPHA DIVERSITY
Mann Whitney showed no significant difference in alpha diversity.
alpha_meas = c("Shannon", "InvSimpson")
richness <- estimate_richness(genus_object, measures = alpha_meas)
## Warning in estimate_richness(genus_object, measures = alpha_meas): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
richness["Type"] <- genus_object@sam_data$Type
p<-plot_richness(genus_object, "Type", "Type", measures=alpha_meas)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p + geom_boxplot(data=p$sata, aes(x=Type, y=value, color=NULL), alpha=0.1) +
theme_bw() +
scale_x_discrete(guide = guide_axis(angle = 0))
wilcox.test(`Shannon` ~ `Type`, data = richness, paired=FALSE) # p-value = p-value = 0.2888
##
## Wilcoxon rank sum exact test
##
## data: Shannon by Type
## W = 162, p-value = 0.3141
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(`InvSimpson` ~ `Type`, data = richness, paired=FALSE) # p-value = p-value = 0.211
##
## Wilcoxon rank sum exact test
##
## data: InvSimpson by Type
## W = 157, p-value = 0.2534
## alternative hypothesis: true location shift is not equal to 0
ALDEX
# ALDEX
my_data_for <- genus_object@otu_table
my_metadata <- genus_object@sam_data
library(ALDEx2)
x.aldex <- aldex(my_data_for, my_metadata$Type, mc.samples=128, test="t", effect=TRUE,
include.sample.summary=FALSE, verbose=FALSE, paired.test=FALSE)
## aldex.clr: generating Monte-Carlo instances and clr values
## conditions vector supplied
## operating in serial mode
## computing center with all features
## aldex.ttest: doing t-test
## aldex.effect: calculating effect sizes
aldex.plot(x.aldex, type="MA", test="wilcox", xlab="Log-ratio abundance",
ylab="Difference", main='MA plot',all.cex=1)
aldex.plot(x.aldex, type="MW", test="wilcox", xlab="Dispersion",
ylab="Difference", main='MW (Effect) plot',all.cex=1)
aldex.plot(x.aldex, type="volcano", test="welch", xlab="Difference",
ylab="-1(log10(q))", main='Volcano plot')
which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05)
## integer(0)
diff_asv <- x.aldex[which(x.aldex$we.eBH < 0.05 & x.aldex$wi.eBH <0.05),]
diff_asv <- row.names(diff_asv)
if (length(diff_asv)>0){
diff_asv <- genus_object@tax_table[diff_asv,]
}
diff_asv
## character(0)
x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),]
which_genus <- rownames(x.aldex[which(x.aldex$we.ep < 0.05 & x.aldex$wi.ep <0.05),])
if (length(which_genus)){
which_genus <- as.data.frame(genus_object@tax_table[rownames(genus_object@tax_table) %in% which_genus,])
which_genus["Pipeline"] <- rep("DEBLUR S8",nrow(which_genus))
final_which_genus <- rbind(final_which_genus,which_genus)
}
Significantly different abundant taxa, result table for all pipelines.
final_which_genus
#DADA2 S1
sample_names(genus_dadas1_object) <- paste(sample_names(genus_dadas1_object),"DADA2 S1")
dadas1 <- as.data.frame(genus_dadas1_object@tax_table)
dadas1_asv <- as.data.frame(genus_dadas1_object@otu_table)
rownames(dadas1) <- apply(dadas1, 1, function(x) paste(x, collapse = "/"))
rownames(dadas1_asv) <- rownames(dadas1)
#DADA2 S12
sample_names(genus_dadas12_object) <- paste(sample_names(genus_dadas12_object),"DADA2 S12")
dadas12 <- as.data.frame(genus_dadas12_object@tax_table)
dadas12_asv <- as.data.frame(genus_dadas12_object@otu_table)
rownames(dadas12) <- apply(dadas12, 1, function(x) paste(x, collapse = "/"))
rownames(dadas12_asv) <- rownames(dadas12)
#DADA2 S14
sample_names(genus_dadas14_object) <- paste(sample_names(genus_dadas14_object),"DADA2 S14")
dadas14 <- as.data.frame(genus_dadas14_object@tax_table)
dadas14_asv <- as.data.frame(genus_dadas14_object@otu_table)
rownames(dadas14) <- apply(dadas14, 1, function(x) paste(x, collapse = "/"))
rownames(dadas14_asv) <- rownames(dadas14)
#VSEARCH DEFAULT
sample_names(genus_vsearchd_object) <- paste(sample_names(genus_vsearchd_object),"VSEARCH DEFAULT")
vsearchd <- as.data.frame(genus_vsearchd_object@tax_table)
vsearchd_asv <- as.data.frame(genus_vsearchd_object@otu_table)
rownames(vsearchd) <- apply(vsearchd, 1, function(x) paste(x, collapse = "/"))
rownames(vsearchd_asv) <- rownames(vsearchd)
#VSEARCH S6
sample_names(genus_vsearchs6_object) <- paste(sample_names(genus_vsearchs6_object),"VSEARCH S6")
vsearchs6 <- as.data.frame(genus_vsearchs6_object@tax_table)
vsearchs6_asv <- as.data.frame(genus_vsearchs6_object@otu_table)
rownames(vsearchs6) <- apply(vsearchs6, 1, function(x) paste(x, collapse = "/"))
rownames(vsearchs6_asv) <- rownames(vsearchs6)
#VSEARCH S9
sample_names(genus_vsearchs9_object) <- paste(sample_names(genus_vsearchs9_object),"VSEARCH S9")
vsearchs9 <- as.data.frame(genus_vsearchs9_object@tax_table)
vsearchs9_asv <- as.data.frame(genus_vsearchs9_object@otu_table)
rownames(vsearchs9) <- apply(vsearchs9, 1, function(x) paste(x, collapse = "/"))
rownames(vsearchs9_asv) <- rownames(vsearchs9)
#VSEARCH DEFAULT IDTAXA
sample_names(genus_vsearchd_idtaxa_object) <- paste(sample_names(genus_vsearchd_idtaxa_object),"VSEARCH DEFAULT IDTAXA")
vsearchd_idtaxa <- as.data.frame(genus_vsearchd_idtaxa_object@tax_table)
vsearchd_idtaxa_asv <- as.data.frame(genus_vsearchd_idtaxa_object@otu_table)
rownames(vsearchd_idtaxa) <- apply(vsearchd_idtaxa, 1, function(x) paste(x, collapse = "/"))
rownames(vsearchd_idtaxa_asv) <- rownames(vsearchd_idtaxa)
#VSEARCH S6 IDTAXA
sample_names(genus_vsearchs6_idtaxa_object) <- paste(sample_names(genus_vsearchs6_idtaxa_object),"VSEARCH S6 IDTAXA")
vsearchs6_idtaxa <- as.data.frame(genus_vsearchs6_idtaxa_object@tax_table)
vsearchs6_idtaxa_asv <- as.data.frame(genus_vsearchs6_idtaxa_object@otu_table)
rownames(vsearchs6_idtaxa) <- apply(vsearchs6_idtaxa, 1, function(x) paste(x, collapse = "/"))
rownames(vsearchs6_idtaxa_asv) <- rownames(vsearchs6_idtaxa)
#VSEARCH S9 IDTAXA
sample_names(genus_vsearchs9_idtaxa_object) <- paste(sample_names(genus_vsearchs9_idtaxa_object),"VSEARCH S9 IDTAXA")
vsearchs9_idtaxa <- as.data.frame(genus_vsearchs9_idtaxa_object@tax_table)
vsearchs9_idtaxa_asv <- as.data.frame(genus_vsearchs9_idtaxa_object@otu_table)
rownames(vsearchs9_idtaxa) <- apply(vsearchs9_idtaxa, 1, function(x) paste(x, collapse = "/"))
rownames(vsearchs9_idtaxa_asv) <- rownames(vsearchs9_idtaxa)
#DEBLUR DEFAULT
sample_names(genus_deblurd_object) <- paste(sample_names(genus_deblurd_object),"Deblur DEFAULT")
deblurd <- as.data.frame(genus_deblurd_object@tax_table)
deblurd_asv <- as.data.frame(genus_deblurd_object@otu_table)
rownames(deblurd) <- apply(deblurd, 1, function(x) paste(x, collapse = "/"))
rownames(deblurd_asv) <- rownames(deblurd)
#DEBLUR S2
sample_names(genus_deblurs2_object) <- paste(sample_names(genus_deblurs2_object),"Deblur S2")
deblurs2 <- as.data.frame(genus_deblurs2_object@tax_table)
deblurs2_asv <- as.data.frame(genus_deblurs2_object@otu_table)
rownames(deblurs2) <- apply(deblurs2, 1, function(x) paste(x, collapse = "/"))
rownames(deblurs2_asv) <- rownames(deblurs2)
#DEBLUR S8
sample_names(genus_deblurs8_object) <- paste(sample_names(genus_deblurs8_object),"Deblur S8")
deblurs8 <- as.data.frame(genus_deblurs8_object@tax_table)
deblurs8_asv <- as.data.frame(genus_deblurs8_object@otu_table)
rownames(deblurs8) <- apply(deblurs8, 1, function(x) paste(x, collapse = "/"))
rownames(deblurs8_asv) <- rownames(deblurs8)
#MERGING
all_asv <- merge(dadas1_asv,dadas12_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,dadas14_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,vsearchd_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,vsearchs6_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,vsearchs9_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,vsearchd_idtaxa_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,vsearchs6_idtaxa_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,vsearchs9_idtaxa_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,deblurd_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,deblurs2_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv <- merge(all_asv,deblurs8_asv, by='row.names', all=TRUE)
row.names(all_asv) <- all_asv[,1]
all_asv <- all_asv[, -1]
all_asv[is.na(all_asv)] <- 0
all_taxa <- as.data.frame(t(as.data.frame(strsplit(rownames(all_asv),"/"))))
all_asv["ASV"] <- paste0("GENUS",1:nrow(all_asv))
all_taxa["ASV"] <- paste0("GENUS",1:nrow(all_taxa))
all_metadata <- data.frame(SampleID=colnames(all_asv)[-191],
Pipeline=regmatches(colnames(all_asv), regexpr("(DADA.+)|(VSEARCH.+)|(Deblur.+)", colnames(all_asv))))
all_genus_object <- construct_phyloseq_real(asv_table = all_asv, taxa_table =all_taxa, metadata = all_metadata )
Results of PERMANOVA across pipelines:
p_values_permanova
## [1] 0.026 0.023 0.024 0.022 0.014 0.015 0.024 0.020 0.018 0.044 0.030 0.024
The alpha and beta diversity measures in PSC and control samples assessed according to data obtained from different pipelines were similar. The Shannon index showed no significant difference between the PSC and AUD samples. However, PERMANOVA revealed significant differences between these two groups, except with the Deblur pipeline set to default settings, where the PERMANOVA was not significant (P=0.052).
The Venn diagram depicts how dissimilar the default settings of individual pipelines were.
list_core <- c() # an empty object to store information
group_list <- unique(all_genus_object@sam_data$Pipeline)
for (n in group_list){
ps.sub <- subset_samples(all_genus_object, Pipeline == n)
core_m <- core_members(ps.sub,
detection = 0,
prevalence = 0)
print(paste0("No. of core taxa in ", n, " : ", length(core_m)))
list_core[[n]] <- core_m
}
## [1] "No. of core taxa in DADA2 S1 : 144"
## [1] "No. of core taxa in DADA2 S12 : 149"
## [1] "No. of core taxa in DADA2 S14 : 154"
## [1] "No. of core taxa in VSEARCH DEFAULT : 145"
## [1] "No. of core taxa in VSEARCH S6 : 144"
## [1] "No. of core taxa in VSEARCH S9 : 155"
## [1] "No. of core taxa in VSEARCH DEFAULT IDTAXA : 132"
## [1] "No. of core taxa in VSEARCH S6 IDTAXA : 130"
## [1] "No. of core taxa in VSEARCH S9 IDTAXA : 132"
## [1] "No. of core taxa in Deblur DEFAULT : 122"
## [1] "No. of core taxa in Deblur S2 : 119"
## [1] "No. of core taxa in Deblur S8 : 120"
p1 <- plot(venn(list_core[c(1,2,3)]))
p2 <- plot(venn(list_core[c(4,5,6)]))
p3 <- plot(venn(list_core[c(7,8,9)]))
ggarrange(p1,p2,p3,
labels = c("A", "B","C"),
ncol = 3, nrow = 1)
p1 <- plot(venn(list_core[c(1,4,7)]))
p2 <- plot(venn(list_core[c(2,5,8)]))
p3 <- plot(venn(list_core[c(3,6,9)]))
ggarrange(p1,p2,p3,
labels = c("A", "B","C"),
ncol = 3, nrow = 1)
p1 <- plot(venn(list_core[c(2,3,5,6,8)]))
p2 <- plot(venn(list_core[c(2,3,5,6,9)]))
ggarrange(p1,p2,
labels = c("A", "B"),
ncol = 2, nrow = 1)
p1 <- plot(venn(list_core[c(5,6,8,9)]))
p2<- plot(venn(list_core[c(1,4,7,10)]))
ggarrange(p1,p2,
labels = c("A", "B"),
ncol = 2, nrow = 1)
Heatmap visualizes genera that was detected by individual pipelines. Only 47.94% of the revealed genera were common acrossall 12 tested pipelines, and 56.70% were common among pipelines that employed the same classifier.
matrix_data <- sapply(list_core, function(x) {
sapply(paste("GENUS", 1:194, sep = ""), function(y) as.numeric(y %in% x))
})
# Convert the matrix to a data frame
df <- as.data.frame(matrix_data)
pheatmap(df, cluster_rows = TRUE, cluster_cols = FALSE,show_rownames = FALSE)
print(sum(apply(df,1,sum) == 12)/nrow(df))
## [1] 0.4793814
where <- c(grep("VSEARCH DEFAULT$",colnames(df)),grep("VSEARCH S6$",colnames(df)),grep("VSEARCH S9$",colnames(df)))
df2 <- df[!(apply(df,1,sum)==0),-where]
print(sum(apply(df2,1,sum) == 9)/nrow(df2))
## [1] 0.5670103
The comparative analysis of biopsy samples revealed that the biggest difference among the compared pipelines was introduced by the taxonomic classifier. As observed in the PCA analysis, an apparent cluster of samples was present for the pipeline that utilized the SINTAX taxonomic classifier.
# aitchison
ps_t <- microbiome::transform(all_genus_object, 'clr')
data_for_pca <- as.data.frame(t(ps_t@otu_table))
data_for_pca["Pipeline"] <- ps_t@sam_data$Pipeline
data_for_pca["Group"] <- my_metadata[regmatches(rownames(data_for_pca), regexpr("run\\d+_\\d+", rownames(data_for_pca))),"Type"]
ord_clr <- ordinate(ps_t, method = "PCoA", distance = "euclidean")
test_permanova <- adonis2(data_for_pca[,-which(colnames(data_for_pca)=="Pipeline" | colnames(data_for_pca)=="Group")] ~ Pipeline, data = data_for_pca, method="euclidean")
test_permanova
data_for_pca["Classifier"] <- "IDTAXA"
data_for_pca[grep("VSEARCH",data_for_pca$Pipeline),"Classifier"] <- "SINTAX"
data_for_pca[grep("VSEARCH .+ IDTAXA",data_for_pca$Pipeline),"Classifier"] <- "IDTAXA"
pca_res <- prcomp(t(ps_t@otu_table))
imp <- round(summary(pca_res)$importance[2,]*100,2)
ggplot(data_for_pca, aes(x=pca_res$x[,1],y=pca_res$x[,2], color= Pipeline, shape=Classifier,show.legend = FALSE)) +
geom_point(show.legend = TRUE, size=3) +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() + scale_color_manual(values=c(
"#1E75B0", "#ABC3E3", "#FA7D0E","#FAB776",
"#2B9D2B", "#D90368", "#D22627","#FA9593",
"#9165B9", "#FAF33E",
"#DF75BE", "#F2B2CE"))
ggplot(data_for_pca, aes(x=pca_res$x[,2],y=pca_res$x[,3], color= Group, show.legend = FALSE)) +
geom_point(show.legend = TRUE, size=3) +
xlab(paste("PC1 ", "(",imp[1],"%", ")", sep=""))+
ylab(paste("PC2 ", "(",imp[2],"%", ")", sep="")) +
theme_bw() +
labs(title="PCA on clr-transformed data")